#set working directory
#setwd("~/file_path")

#NBDA --------------------------------------------------------------------

library(geosphere)
library(stringr)

source("NBDA code 1.2.13.R")

#generate OADA for each diffusion
counter <- 1
while(counter <= 3){
  
  #load the appropriate datasets
  if(counter == 1){
    load("amen_brother/years.RData")
    load("amen_brother/discogs_ids.RData")
    load("amen_brother/matrix.RData")
    load("amen_brother/artist_info.RData")
    load("amen_brother/spotify_data.RData")
    load("amen_brother/geo_thesaurus.RData")
  }
  if(counter == 2){
    load("think_about_it/years.RData")
    load("think_about_it/discogs_ids.RData")
    load("think_about_it/matrix.RData")
    load("think_about_it/artist_info.RData")
    load("think_about_it/spotify_data.RData")
    load("think_about_it/geo_thesaurus.RData")
  }
  if(counter == 3){
    load("funky_drummer/years.RData")
    load("funky_drummer/discogs_ids.RData")
    load("funky_drummer/matrix.RData")
    load("funky_drummer/artist_info.RData")
    load("funky_drummer/spotify_data.RData")
    load("funky_drummer/geo_thesaurus.RData")
  }
  
  years_raw <- years
  
  years <- c()
  weights <- c()
  i <- 1
  while(i <= length(rownames(matrix))){
    years_tally <- c()
    j <- 1
    while(j <= length(discogs_ids)){
      if(rownames(matrix)[i] %in% unlist(discogs_ids[j])){
        years_tally <- c(years_tally, years_raw[j])
      }
      j <- j + 1
    }
    weights <- c(weights, length(years_tally))
    years <- c(years, min(as.numeric(years_tally)))
    i <- i + 1
  }
  
  gender <- c()
  popularity <- c()
  followers <- c()
  i <- 1
  while(i <= length(rownames(matrix))){
    gender <- c(gender, artist_info$gender[match(rownames(matrix)[i], unlist(artist_info$discogs))])
    popularity <- c(popularity, spotify_data$popularity[match(rownames(matrix)[i], unlist(artist_info$discogs))])
    followers <- c(followers, spotify_data$followers[match(rownames(matrix)[i], unlist(artist_info$discogs))])
    i <- i + 1
  }
  
  #female = -1; male = 1; 0 = NA or other
  gender[which(gender == 1)] <- -1
  gender[which(gender == 2)] <- 1
  gender[which(gender == 3)] <- 0
  gender[which(gender == 4)] <- 0
  gender[is.na(gender)] <- 0
  
  #retrieve lon and lat for each artist
  lon <- c()
  lat <- c()
  i <- 1
  while(i <= length(rownames(matrix))){
    lon <- c(lon, geo_thesaurus$lon[match(rownames(matrix)[i], geo_thesaurus$id)])
    lat <- c(lat, geo_thesaurus$lat[match(rownames(matrix)[i], geo_thesaurus$id)])
    i <- i + 1
  }
  
  coords <- matrix(c(lon, lat), nrow = length(lon), ncol = 2)
  
  #calculate mean distance of each point from all other points (in meters) on an ellipsoid, excluding individuals without geographic info...
  i <- 1
  meandist <- c()
  while(i <= length(lon)){
    if(!is.na(lon[i])){
      temp <- coords[which(!is.na(lon[-1])) + 1, ]
      meandist <- c(meandist, mean(distGeo(coords[i,], temp)))
    }
    else{
      meandist <- c(meandist, NA)
    }
    i <- i + 1
  }
  
  #center and scale continuous variables
  popularity <- c(scale(popularity, center = TRUE, scale = FALSE))
  followers <- c(scale(followers, center = TRUE, scale = FALSE))
  meandist <- c(scale(meandist, center = TRUE, scale = FALSE))
  
  ind_vars <- cbind(gender, popularity, followers, meandist)
  
  rownames(matrix) <- NULL
  colnames(matrix) <- NULL
  
  all_years <- names(table(sort(years)))
  
  orders <- c()
  i <- 1
  while(i <= length(years)){
    orders <- c(orders, which(all_years %in% years[i]))
    i <- i + 1
  }
  
  orders_nbda <- c()
  i <- 1
  while(i <= length(table(orders))){
    orders_nbda <- c(orders_nbda, which(orders == i))
    i <- i + 1
  }
  
  #create the appropriate oaData object
  if(counter == 1){
    amen_brother_oa <- oaData(assMatrix = matrix, asoc = ind_vars, orderAcq = orders_nbda, groupid = "1", weights = weights)
  }
  if(counter == 2){
    think_about_it_oa <- oaData(assMatrix = matrix, asoc = ind_vars, orderAcq = orders_nbda, groupid = "2", weights = weights)
  }
  if(counter == 3){
    funky_drummer_oa <- oaData(assMatrix = matrix, asoc = ind_vars, orderAcq = orders_nbda, groupid = "3", weights = weights)
  }
  
  #remove everything before next run...
  remove(list = c("artist_info", "coords", "discogs_ids", "geo_thesaurus", "ind_vars", "matrix", "spotify_data", "temp", "years_raw", "years_tally", "all_years", "followers", "gender", "i", "j", "lat", "lon", "meandist", "orders", "orders_nbda", "popularity", "years"))
  
  cat(counter, " is done! ", sep = "")
  
  counter <- counter + 1
}

#social parameters
sParamMatrix <- rbind(c(1, 1, 1))

#run all possible combinations of models
aic_table <- aicTableOA(oadata = c("amen_brother_oa","funky_drummer_oa","think_about_it_oa"), asocialVar = c(1, 2, 3, 4), group = FALSE, task = FALSE, sParamMatrix = sParamMatrix, aic = "aicc", pure = FALSE)

#run two best fitting models to get estimates for individual-level variables
multiplicative_1234 <- multiCoxFit(oadata = c("amen_brother_oa","funky_drummer_oa","think_about_it_oa"), bounded = FALSE, sParam = c(sParamMatrix), formula = ~.+gender+popularity+followers+meandist)
